#Nature's Kidneys: the Role of Wetland Reserve Easements in Restoring Water Quality
#Nicole Karwowski and Marin Skidmore
#Replication File
#2025

#Aggregate and collapse water quality data to HUC12

#open libraries
library(fst)
library(ggplot2)
library(raster)
library(sf)
library(dplyr)
library(mapview)

#working directory
setwd("C:/Users/16308/OneDrive - Montana State University/UW_Madison/Water quality/Replication Package")

#open cleaned SNAPD data file
wq<-read_fst("Data/SNAPD/Raw/SNAPD/_B_workflow/SNAPD.fst")

#open huc12 sites merger key
load("Data/SNAPD/Processed/sites_huc12.RData")

#check out data
summary(wq)

#merge huc 12 id with site MLI id
colnames(sites_huc12)
sites_huc12<-subset(as.data.frame(sites_huc12), select=c(1:3))
wq<-merge(wq, sites_huc12, by.x="MLI", by.y="MLI", all.x=TRUE, all.y=FALSE)

list(wq)

table(is.na(wq$huc12))
table(is.na(wq$tohuc))

wq<-subset(wq, !is.na(wq$huc12))

#CLEAN
#get rid of NA concentrations 
wq<-subset(wq, !is.na(wq$conc))

#how many are outliers?
table(wq$outlier_flag, useNA="always")

#outliers are flagged if a concentration is above the 99th or below the 1st percentile
#these are likely from mismeasurement or reporting error. 
outlier<-subset(wq, wq$outlier_flag=="potential_outlier")
summary(outlier$conc)

#get rid of outliers
table(wq$outlier_flag, useNA="always")
wq<-subset(wq, wq$outlier_flag=="not_flagged_as_outlier")

#how many are imputed? 
table(wq$impute_flag, useNA="always")

#create month variable
wq$YearMonth<-format(as.Date(wq$date), "%Y-%m")
wq$Month<-substring(wq$date, 6, 7)
wq$Year<-substring(wq$date, 1,4)
wq$YearMonth<-paste0(wq$Year, wq$Month, sep="")
wq<-wq %>% relocate(YearMonth, .after=date)
wq<-wq %>% relocate(Month, .after=YearMonth)
wq<-wq %>% relocate(Year, .after=YearMonth)

#count of readings by nutrient compound
table(wq$nutrient_parameter)

###############################################################################
#Collapse to huc12 nutrient 

Panel<-aggregate(conc~huc12+YearMonth+nutrient_parameter, data=wq, na.rm=TRUE, FUN=mean)

table(Panel$nutrient_parameter)

#reshape data from long to wide
Panel2<-reshape(Panel, idvar=c("huc12", "YearMonth"), 
                timevar="nutrient_parameter", 
                direction="wide")

Panel2$Year<-substring(Panel2$YearMonth, 1, 4)
Panel2$Month<-substring(Panel2$YearMonth, 5, 6)

Panel2<- Panel2 %>% relocate(Year, .after=YearMonth)
Panel2<- Panel2 %>% relocate(Month, .after=Year)

names(Panel2)
names(Panel2)<-gsub(pattern="conc.", replacement="", x=names(Panel2))

wq_huc12<-Panel2

#lets save it
save(wq_huc12, file="Data/SNAPD/Processed/SNAPwq_huc_ym.RData")

